# =============================================================================
# BOIX & SVOLIK (2013) REPLICATION WITH ROBUSTNESS ANALYSIS
# Survival analysis using Weibull models with jackknife and bootstrap robustness
# Author: [Your Name]
# Last modified: [Date]
# =============================================================================

# --- PACKAGES ----------------------------------------------------------------
library(dplyr)
library(survival)
library(flexsurv)
library(survminer)
library(tidyverse)
library(matrixStats)
library(haven)
library(xtable)
library(texreg)

# --- CONFIGURATION -----------------------------------------------------------
set.seed(123)  # For reproducibility
n_bootstrap <- 1000

# --- DATA LOADING ------------------------------------------------------------
# Load original Boix & Svolik leader data
data <- read_dta("leader_tvc_2.dta", encoding = "latin1") %>%
  mutate(COWcode = ccode)

# Load latent estimates
latent <- read.csv("estimates_independent.csv")

# --- DATA PREPARATION --------------------------------------------------------
# Deduplicate latent estimates (one observation per country-year)
latent_unique <- latent %>%
  distinct(COWcode, year, .keep_all = TRUE)

# Build master leader-year dataframe with lags
df_master <- data %>%
  inner_join(latent_unique, by = c("COWcode", "year")) %>%
  arrange(COWcode, year, leadid) %>%
  group_by(COWcode) %>%
  mutate(
    dyn.estimates_lag = lag(dyn.estimates, 1),
    dyn.up_lag        = lag(dyn.up, 1),
    dyn.lo_lag        = lag(dyn.lo, 1)
  ) %>%
  ungroup() %>%
  group_by(leadid) %>%
  mutate(time0 = lag(time, default = 0)) %>%
  ungroup()

# Define complete set of covariates used across all models
all_covs <- c(
  # Timing and event indicators
  "time0", "time", "c_coup", "c_natural", "c_revolt",
  # Original model predictors
  "legislature", "leg_growth_2",
  # New model predictor
  "dyn.estimates_lag",
  # Control variables
  "gdp_1k", "chgdpen_fearonlaitin", "Oil_fearonlaitin",
  "postcoldwarlag", "civiliandictatorshiplag", "militarydictatorshiplag",
  "communist", "lpopl1_fearonlaitin", "ethfrac_fearonlaitin",
  "relfrac_fearonlaitin", "age"
)

# Complete case analysis (drop any row with missing values)
df_cc <- df_master %>%
  filter(complete.cases(select(., all_of(all_covs))))

# --- HELPER FUNCTIONS --------------------------------------------------------
# Jackknife function for survival models
jackknife_flexsurv <- function(surv_formula, data, dist = "weibull") {
  n <- nrow(data)
  results <- numeric(n)
  
  for (i in seq_len(n)) {
    temp_data <- data[-i, ]
    model <- flexsurvreg(surv_formula, data = temp_data, dist = dist)
    results[i] <- model$res["dyn.estimates_lag", "est"]
  }
  
  return(results)
}

# Function to compare jackknife results with full-sample coefficient
compare_coefficients_survival <- function(coef_vector, original_coef, model_name) {
  h <- hist(coef_vector, main = paste(model_name),
            xlab = "Coefficient Estimate",
            col = "gray", border = "white", breaks = 20)
  
  abline(v = original_coef, col = "blue", lwd = 2, lty = 2)
  
  y_max <- max(h$counts)
  text(x = original_coef, y = y_max * 0.9, 
       labels = paste0("Full Sample:\n", round(original_coef, 4)),
       col = "blue", pos = 4, cex = 0.8)
  
  cat(paste0("\n", model_name, ":\n"))
  cat("Full-sample coefficient: ", round(original_coef, 4), "\n")
  cat("Min: ", round(min(coef_vector), 4), "\n")
  cat("Max: ", round(max(coef_vector), 4), "\n")
  cat("Range: ", paste(round(range(coef_vector), 4), collapse = " - "), "\n")
}

# --- ANALYSIS ----------------------------------------------------------------
# Fit Weibull survival models - Original specifications
fit_coups_restr <- flexsurvreg(
  Surv(time0, time, c_coup) ~ legislature + leg_growth_2 + 
    gdp_1k + chgdpen_fearonlaitin + Oil_fearonlaitin +
    postcoldwarlag + civiliandictatorshiplag +
    militarydictatorshiplag + communist +
    lpopl1_fearonlaitin + ethfrac_fearonlaitin +
    relfrac_fearonlaitin + age,
  data = df_cc, dist = "weibull"
)

fit_natural_restr <- flexsurvreg(
  Surv(time0, time, c_natural) ~ legislature + leg_growth_2 + 
    gdp_1k + chgdpen_fearonlaitin + Oil_fearonlaitin +
    postcoldwarlag + civiliandictatorshiplag +
    militarydictatorshiplag + communist +
    lpopl1_fearonlaitin + ethfrac_fearonlaitin +
    relfrac_fearonlaitin + age,
  data = df_cc, dist = "weibull"
)

fit_revolt_restr <- flexsurvreg(
  Surv(time0, time, c_revolt) ~ legislature + leg_growth_2 + 
    gdp_1k + chgdpen_fearonlaitin + Oil_fearonlaitin +
    postcoldwarlag + civiliandictatorshiplag +
    militarydictatorshiplag + communist +
    lpopl1_fearonlaitin + ethfrac_fearonlaitin +
    relfrac_fearonlaitin + age,
  data = df_cc, dist = "weibull"
)

# Fit Weibull survival models - New latent measure specifications
fit_coups_new <- flexsurvreg(
  Surv(time0, time, c_coup) ~ dyn.estimates_lag + 
    gdp_1k + chgdpen_fearonlaitin + Oil_fearonlaitin +
    postcoldwarlag + civiliandictatorshiplag +
    militarydictatorshiplag + communist +
    lpopl1_fearonlaitin + ethfrac_fearonlaitin +
    relfrac_fearonlaitin + age,
  data = df_cc, dist = "weibull"
)

fit_natural_new <- flexsurvreg(
  Surv(time0, time, c_natural) ~ dyn.estimates_lag + 
    gdp_1k + chgdpen_fearonlaitin + Oil_fearonlaitin +
    postcoldwarlag + civiliandictatorshiplag +
    militarydictatorshiplag + communist +
    lpopl1_fearonlaitin + ethfrac_fearonlaitin +
    relfrac_fearonlaitin + age,
  data = df_cc, dist = "weibull"
)

fit_revolt_new <- flexsurvreg(
  Surv(time0, time, c_revolt) ~ dyn.estimates_lag + 
    gdp_1k + chgdpen_fearonlaitin + Oil_fearonlaitin +
    postcoldwarlag + civiliandictatorshiplag +
    militarydictatorshiplag + communist +
    lpopl1_fearonlaitin + ethfrac_fearonlaitin +
    relfrac_fearonlaitin + age,
  data = df_cc, dist = "weibull"
)

# Display main results
texreg(list(
  fit_natural_restr, fit_natural_new, 
  fit_coups_restr, fit_coups_new, 
  fit_revolt_restr, fit_revolt_new
))

# --- ROBUSTNESS ANALYSIS -----------------------------------------------------
# Define survival formulas for robustness analysis
surv_formula_coup_new <- Surv(time0, time, c_coup) ~ dyn.estimates_lag +
  gdp_1k + chgdpen_fearonlaitin + Oil_fearonlaitin +
  postcoldwarlag + civiliandictatorshiplag +
  militarydictatorshiplag + communist +
  lpopl1_fearonlaitin + ethfrac_fearonlaitin +
  relfrac_fearonlaitin + age

surv_formula_natural_new <- Surv(time0, time, c_natural) ~ dyn.estimates_lag +
  gdp_1k + chgdpen_fearonlaitin + Oil_fearonlaitin +
  postcoldwarlag + civiliandictatorshiplag +
  militarydictatorshiplag + communist +
  lpopl1_fearonlaitin + ethfrac_fearonlaitin +
  relfrac_fearonlaitin + age

surv_formula_revolt_new <- Surv(time0, time, c_revolt) ~ dyn.estimates_lag +
  gdp_1k + chgdpen_fearonlaitin + Oil_fearonlaitin +
  postcoldwarlag + civiliandictatorshiplag +
  militarydictatorshiplag + communist +
  lpopl1_fearonlaitin + ethfrac_fearonlaitin +
  relfrac_fearonlaitin + age

# Run jackknife analysis
cat("\n=== JACKKNIFE ANALYSIS ===\n")
coef_coup    <- jackknife_flexsurv(surv_formula_coup_new, df_cc)
coef_natural <- jackknife_flexsurv(surv_formula_natural_new, df_cc)
coef_revolt  <- jackknife_flexsurv(surv_formula_revolt_new, df_cc)

# Extract full-sample coefficients for comparison
orig_coef_coup    <- fit_coups_new$res["dyn.estimates_lag", "est"]
orig_coef_natural <- fit_natural_new$res["dyn.estimates_lag", "est"]
orig_coef_revolt  <- fit_revolt_new$res["dyn.estimates_lag", "est"]

# --- VISUALIZATION -----------------------------------------------------------
# Plot jackknife distributions
cat("\n=== JACKKNIFE DISTRIBUTIONS ===\n")
par(mfrow = c(1, 3))
compare_coefficients_survival(coef_coup, orig_coef_coup, "Coups")
compare_coefficients_survival(coef_natural, orig_coef_natural, "Natural")
compare_coefficients_survival(coef_revolt, orig_coef_revolt, "Revolts")

# Reset plotting layout
par(mfrow = c(1, 1))